clear  all
set maxvar 10000
set more off

* ================================================
* written by Byungkyu Lee (bl2474@columbia.edu)
* Date : 10/27/16
* to prepare analytic data sets to reproduce all figures and table
* Lee, Byungkyu and Peter Bearman, 2016, "Important Matters in Political Context", Sociological Science
* ================================================

* set up the working directory in your computer
cd 

* ================================================
* GSS 1985, 1987, 2004
* ================================================

* load the GSS cross-sectional data 
use GSS7214_R4, clear

* ----------- suvey timing 
recode dateintv (missing=.)

gen str_dateintv = string(dateintv)
	replace str_dateintv = "" if str_dateintv == "."

gen n_str = length(str_dateintv)

gen month = "" if n_str != 0
	replace month = substr(str_dateintv,1,1) if n_str == 3
	replace month = substr(str_dateintv,1,2) if n_str == 4

gen day = "" if n_str != 0
	replace day = substr(str_dateintv, 2,2) if n_str == 3
	replace day = substr(str_dateintv, 3,2) if n_str == 4

gen yy = year 

* change year of the 2004 sample -- january 2005
replace yy = 2005 if year == 2004 & month == "1"

gen svydate = day+"/"+month+"/"+string(yy)
gen svydate2 = date(svydate, "DMY")
format svydate2 %td

* ----------- individual baseline characteristics  
gen r_age = age if ~missing(age)
gen r_female = (sex==2) if ~missing(sex)
gen r_educ = educ if ~missing(educ)

gen r_race = race if ~missing(race)
	gen r_white = race==1 if ~missing(race)
	gen r_black = race==2 if ~missing(race)
	gen r_others = race==3 | race == 4 if ~missing(race)

gen r_married = marital==1 if ~missing(marital)

gen r_nchilds = childs if ~missing(childs)
gen r_wrkstat = wrkstat
	recode r_wrkstat (1/4=0) (5=1) (6 8 9 = 0) (7=2)

* voluntary associational memberships
gen r_memnum = memnum
gen r_nomember = memnum==0 if ~missing(memnum)

* party identification and political ideology
gen r_partyid = partyid if ~missing(partyid)
gen r_ideology = polviews if ~missing(polviews)

* other political information 
gen r_pol_interest = 4 - polint1 
	gen r_pol_interest0 = r_pol_interest/3 
gen r_pol_discuss = 4- discpol 


* ----------- network information : important matters 
* network size 
gen n_size = numgiven if ~missing(numgiven)

* ----------- face-to-face contacts 
gen r_contact = numcntct 
gen r_inperson = inperson
gen r_byphone = byphone 
gen r_letters = letters
gen r_meetings = meetings 
gen r_byemail = byemail 

* ----------- survey designs
* before 2004 using adults as survey weights
gen p_adults = 1/adults
gen wtall = p_adults if year < 2004
replace wtall = wtssnr if year >= 2004

* ----------- interview related variable
* ----- interviewer characteristics
gen i_female = intsex == 2 if ~missing(intsex)
gen i_age = intage 

recode intethn (1=1) (2=2) (3/5=3), gen(i_race)
	gen i_black = i_race == 2 if ~missing(i_race)
	gen i_white = i_race == 1 if ~missing(i_race)
	gen i_others = i_race == 3 if ~missing(i_race)
gen i_tenure = intyrs 

gen i_uncoop = coop 
gen i_poorcomprend = comprend 
gen i_saq_skip_gene = (saqgene == 3) if ~missing(saqgene)

sort year intid svydate2 id 
bysort year intid: gen i_numskip = sum(i_saq_skip_gene[_n-1])
recode i_numskip (3=2) (4/24=3), gen(i_numskip_c)

gen i_phonemode = mode==2 if ~missing(mode)
gen i_length = lngthinv

* ----------- detect outlier interviewers over-reporting social isolation!
gen one = 1
gen nintid = year*100+intid 
gen isolated = (n_size==0) if ~missing(numgiven) & ~missing(nintid)

egen p_isolated = mean(isolated), by(nintid), if ~missing(n_size) & ~missing(nintid)
egen n_interview = sum(one), by(nintid), if ~missing(n_size) & ~missing(nintid)
egen n_isolated = sum(isolated), by(nintid), if ~missing(n_size) & ~missing(nintid)

* probability of over-reporting social isolation 
gen bp12_isolated = binomialtail(n_interview, n_isolated, 0.12) if ~missing(n_size) & ~missing(nintid)
gen bp23_isolated = binomialtail(n_interview, n_isolated, 0.23) if ~missing(n_size) & ~missing(nintid)

* check!
*bitest isolated == 0.12 if nintid == 200415
*tab bp12_isolated if nintid == 200415

gen i_over12 = 0 if ~missing(intid) & ~missing(numgiven)
	replace i_over12 = 1 if bp12_isolated < 0.01 
	
gen i_over23 = 0 if ~missing(intid) & ~missing(numgiven)
	replace i_over23 = 1 if bp23_isolated < 0.01 
	


* ----------- detect outlier interviewers over-reporting membership isolation!
gen nomembership = (memnum==0) if ~missing(memnum) & ~missing(nintid)
egen p_nomembership = mean(nomembership), by(nintid), if ~missing(memnum) & ~missing(nintid)
egen n_interview2 = sum(one), by(nintid), if ~missing(memnum) & ~missing(nintid)
egen n_nomembership = sum(nomembership), by(nintid), if ~missing(memnum) & ~missing(nintid)

gen bp25_nomember = binomialtail(n_interview2, n_nomembership, 0.25) if ~missing(memnum) & ~missing(nintid)

gen i_mover25 = 0 if ~missing(intid) & ~missing(memnum)
	replace i_mover25 = 1 if bp25_nomember < 0.01 


gen dataset = "GSS"

keep id dataset sample vpsu vstrat sampcode wtssnr wtssall wtall adults oversamp formwt year svydate2 /*
*/ n_size isolated nomembership r_* i_* intid 


keep if year == 1985 | year == 1987 | year == 2004 
save gss_data1, replace 

* ================================================
* GSS 2010
* ================================================
use GSS_panel06w123_R3, clear

* --- data is collected at 2010 (wave3)
gen id = id_1
gen id3 = id_3

* ---------- survey timing
gen year = year_3 
recode dateintv_3 (missing=.), gen(dateintv)

gen str_dateintv = string(dateintv)
replace str_dateintv = "" if str_dateintv == "."

gen month = substr(str_dateintv,1,1)
gen day = substr(str_dateintv, 2,2) 

gen svydate = day+"/"+month+"/"+string(year)
gen svydate2 = date(svydate, "DMY")
format svydate2 %td

* ----------- subject's baseline characteristics  
gen r_age = age_3 if ~missing(age_3)
gen r_female = (sex_3==2) if ~missing(sex_3)
gen r_educ = educ_3 if ~missing(educ_3)
gen r_race = race_3 if ~missing(race_3)
	gen r_white = r_race==1 if ~missing(r_race)
	gen r_black = r_race==2 if ~missing(r_race)
	gen r_others = r_race==3 if ~missing(r_race)

gen r_married = marital_3==1 if ~missing(marital_3)
gen r_nchilds = childs_3 if ~missing(childs_3)

gen r_wrkstat = wrkstat_3
	recode r_wrkstat (1/4=0) (5=1) (6 8 9 = 0) (7=2)

gen r_partyid = partyid_3 if ~missing(partyid_3)
gen r_ideology = polviews_3 if ~missing(polviews_3)

gen r_pol_interest = 4-polint1_2 
	gen r_pol_interest0  = r_pol_interest / 3

* ----------- network information 
gen n_size = numgiven_3 if ~missing(numgiven_3)
gen numgiven = numgiven_3

* voluntary association membership
foreach X in memfrat memserv memvet mempolit memunion memsport memyouth memschl memhobby memgreek memnat memfarm memlit memprof memchurh memother {
	gen `X' = `X'_3 
}

recode memfrat memserv memvet mempolit memunion memsport memyouth memschl ///
	memhobby memgreek memnat memfarm memlit memprof memchurh memother (2=0) (8 9=.) 

egen memnum = rowtotal(memfrat memserv memvet mempolit memunion memsport memyouth memschl ///
	memhobby memgreek memnat memfarm memlit memprof memchurh memother)

egen nonmissing_memnum = rownonmiss(memfrat memserv memvet mempolit memunion memsport memyouth memschl ///
	memhobby memgreek memnat memfarm memlit memprof memchurh memother)
replace memnum = . if nonmissing_memnum == 0

gen r_memnum = memnum 
gen r_nomember = memnum == 0 if ~missing(memnum)


* ----------- interviewer related variables
gen intid = intid_3
gen nintid = intid 

gen i_female = intsex_3 == 2 if ~missing(intsex_3)
gen i_age = intage_3 

recode intethn_3 (1=1) (2=2) (3/5=3), gen(i_race)
	gen i_black = i_race == 2 if ~missing(i_race)
	gen i_white = i_race == 1 if ~missing(i_race)
	gen i_others = i_race == 3 if ~missing(i_race)
gen i_tenure = intyrs_3

gen i_phonemode = mode_3==2 if ~missing(mode_3)
gen i_length = lngthinv_3 

gen i_uncoop = coop_3 
gen i_poorcomprend = comprend_3 
gen i_ballot = ballot


* ----------- detect outlier interviewers over-reporting social isolation!
gen one = 1
gen isolated = (n_size==0) if ~missing(n_size) & ~missing(nintid)

egen p_isolated = mean(isolated), by(nintid), if ~missing(n_size) & ~missing(nintid)
egen n_interview = sum(one), by(nintid), if ~missing(n_size) & ~missing(nintid)
egen n_isolated = sum(isolated), by(nintid), if ~missing(n_size) & ~missing(nintid)

* probability of over-reporting social isolation 
gen bp12_isolated = binomialtail(n_interview, n_isolated, 0.12) if ~missing(n_size) & ~missing(nintid)
gen bp23_isolated = binomialtail(n_interview, n_isolated, 0.23) if ~missing(n_size) & ~missing(nintid)

gen i_over12 = 0 if ~missing(intid) & ~missing(numgiven)
	replace i_over12 = 1 if bp12_isolated < 0.01 
	
gen i_over23 = 0 if ~missing(intid) & ~missing(numgiven)
	replace i_over23 = 1 if bp23_isolated < 0.01 
	

* ----------- detect outlier interviewers over-reporting membership isolation!
gen nomembership = (memnum==0) if ~missing(memnum) & ~missing(nintid)
egen p_nomembership = mean(nomembership), by(nintid), if ~missing(memnum) & ~missing(nintid)
egen n_interview2 = sum(one), by(nintid), if ~missing(memnum) & ~missing(nintid)
egen n_nomembership = sum(nomembership), by(nintid), if ~missing(memnum) & ~missing(nintid)

gen bp25_nomember = binomialtail(n_interview2, n_nomembership, 0.25) if ~missing(memnum) & ~missing(nintid)
gen bp30_nomember = binomialtail(n_interview2, n_nomembership, 0.3) if ~missing(memnum) & ~missing(nintid)
gen bp35_nomember = binomialtail(n_interview2, n_nomembership, 0.35) if ~missing(memnum) & ~missing(nintid)

gen i_mover25 = 0 if ~missing(intid) & ~missing(memnum)
	replace i_mover25 = 1 if bp25_nomember < 0.01 


gen dataset = "GSS"
keep id id_3 dataset ballot vpsu vstrat sampcode wtpan123 oversamp formwt r_* year svydate2 /*
*/ n_size nomembership memnum intid i_* 


save gss_data2, replace 

* ================================================
* ANES 2000
* ================================================
use anes2000TS, clear

* ----------   survey timing 
gen time = V000130
replace time = "" if time == "0000"

gen month = "" if ~missing(time)
	replace month = substr(time,1,2) 

gen day = "" if ~missing(time)
	replace day = substr(time, 3,2)

gen year = 2000 

gen svydate = day+"/"+month+"/"+string(year)
gen svydate2 = date(svydate, "DMY")
format svydate2 %td


* ---------- respondent characterristics
gen id = V000001

gen r_age = V000908 if ~missing(V000908)
gen r_female = (V001029 ==2) if ~missing(V001029)
gen r_degree = V000913
	replace r_degree = . if r_degree == 9
	recode r_degree (1=8) (2=11) (3=12) (4=13) (5=14) (6=16) (7=18) (9=.), gen(r_educ)

gen r_race = . 
replace r_race = 1 if missing(r_race) & V000066 == 1
replace r_race = 2 if missing(r_race) & V000066 == 2
replace r_race = 3 if missing(r_race) & V000066 > 2 & V000066 < 8

replace r_race = 1 if missing(r_race) & V001030 == 1
replace r_race = 2 if missing(r_race) & V001030 == 2
replace r_race = 4 if missing(r_race) & V001030 == 0 // not specified : phone-mode

gen r_white = (r_race == 1) if ~missing(r_race)
gen r_black = (r_race == 2) if ~missing(r_race)
gen r_others = (r_race == 3) if ~missing(r_race)
gen r_racena = (r_race == 4) if ~missing(r_race)

gen r_married = (V000909 == 1) if V000909 > 0 & V000909 <8 

gen r_nchilds = V001024 if V001024  < 90 
	replace r_nchilds = 0 if V001023 == 5

gen r_wrkstat = . 
	replace r_wrkstat = 0 if V001675 == 1  
	replace r_wrkstat = 0 if V001676a == 1 | V001676a == 2 | V001676a == 3 | V001676a == 4 | V001676a == 6
	replace r_wrkstat = 0 if V001676b == 1 | V001676b == 2 | V001676b == 3 | V001676b == 4 | V001676b == 6
	replace r_wrkstat = 1 if V001676a == 3 | V001676b == 3 
	replace r_wrkstat = 2 if V001676a == 5 | V001676b == 5 
	replace r_wrkstat = 3 if missing(r_wrkstat)


gen r_pol_interest0 = 5-V001201 if V001201 > 0 
	replace r_pol_interest0 = r_pol_interest0 / 4

gen r_partyid = V000523 if V000523 != 9 
gen r_ideology = V000446 if V000446 > 0 & V000446 < 8



* ---------------- interview related variable
gen intid = V000070
gen nintid = intid 

* ---------------- survey designs 
gen wt_sample = V000002
gen wt_post = V000002a 
gen iw_mode = V000004
gen sampcode = V000097
gen s_sampcode = string(sampcode)
gen size_samp = length(s_sampcode)

gen vstrat = ""
	replace vstrat = substr(s_sampcode,1,1) if size_samp == 2 
	replace vstrat = substr(s_sampcode,1,2) if size_samp == 3 

gen vpsu = ""
	replace vpsu = substr(s_sampcode,2,1) if size_samp == 2 
	replace vpsu = substr(s_sampcode,3,1) if size_samp == 3 

destring vstrat, replace 
destring vpsu, replace 

* ----------------- network information 
gen n_size = 0 if  V001699 > 0 & ~missing(V001699)
	replace n_size = 1 if ~missing(n_size) & V001700 == 5
	replace n_size = 2 if ~missing(n_size) & V001701 == 5
	replace n_size = 3 if ~missing(n_size) & V001702 == 5
	replace n_size = 4 if ~missing(n_size) & V001702 == 1


gen dataset = "ANES"
keep id dataset sampcode vpsu vstrat wt_sample wt_post iw_mode year svydate2 /*
*/ intid r_* n_size 

save anes_2000, replace 


* ================================================
* ANES 2006
* ================================================
* NOTE : merge respondents' characteristics found in ANES 2004 later to link to ANES 2006
use anes2004TS, clear
gen mid = V040001

recode V043299 (10=2) (50=1) (else=3), gen(r_race)
	replace r_race = . if V043299 == 88 
	replace r_race = . if V043299 == 89

	gen r_white = r_race == 1 if ~missing(r_race)
	gen r_black = r_race == 2 if ~missing(r_race)
	gen r_others = r_race == 3 if ~missing(r_race)

gen r_married = V043251 == 1 if V043251 < 8

gen r_degree = V043254
	replace r_degree = . if r_degree == 9
	recode r_degree (1=8) (2=11) (3=12) (4=13) (5=14) (6=16) (7=18) (9=.), gen(r_educ)

keep mid r_* 

save anes2004_merge, replace 

* ---------------- ANES 2006 data
use anes2006pilot,clear
gen id= V06P001
gen mid = id 

merge 1:1 mid using anes2004_merge, gen(m_2004)

keep if m_2004 == 3 
drop mid 

* ------- survey timing 
gen year = 2006
gen str_dateintv = V06P201c 
replace str_dateintv = V06P202c  if V06P202c != ""

gen month = "" 
replace month = substr(str_dateintv,1,2)

gen day = ""
replace day = substr(str_dateintv, 3,2)

gen yy = year 
replace yy = 2007 if year == 2006 & month == "01"

gen svydate = day+"/"+month+"/"+string(yy)
gen svydate2 = date(svydate, "DMY")
format svydate2 %td

* ------- respondent characteristics 
gen r_female = V06P005 == 2 if ~missing(V06P005)
gen r_age = V06P006

gen r_partyid = V06P680 
recode r_partyid (9=.)

* ------- survey designs
gen wtall = V06P002 
gen sampcode = V06P007b 
gen vpsu = V06P007a 
gen intid = V06P301 

* ----------- network information 
gen module = V06P406
gen n_size = V06P588
replace n_size = . if n_size == 99 

gen dataset = "ANES"

keep id sampcode vpsu wtall module intid r_* svydate2 year dataset /*
*/ n_size r_partyid  

save anes_2006, replace 

* ================================================
* merge all data set
* ================================================
use gss_data1, clear 
append using gss_data2, 
append using anes_2000, 
append using anes_2006,

gen wt = wtall if dataset == "GSS" & year < 2010
replace wt = wtpan123 if dataset== "GSS" & year == 2010
replace wt = wt_sample if dataset == "ANES" & year == 2000 
replace wt = wtall if dataset == "ANES" & year == 2006 

* adjust network sizes; top-coding;
gen n_size6 = n_size 
	replace n_size6 = 6 if n_size >= 6 & ~missing(n_size)
	replace n_size6 = 6 if year == 2000 & n_size == 4 
	replace n_size6 = 6.5 if n_size6 == 6 // adjust the size 

gen n_size4 = n_size 
	replace n_size4 = 4 if n_size > 4 & ~missing(n_size)

tab n_size6, gen(ns) 
gen ni = n_size == 0 if ~missing(n_size)

*------------------------------- variable label 
label var r_age "Age"
label var r_educ "Education"
label var r_female "Female"
label var r_race "Race"
	label define r_race 1 "White" 2 "Black" 3 "Other" 4 "missing"
	label value r_race r_race

label var r_married "Currently Married"
label var r_nchilds "Number of Children"

label var n_size "Network Size"
label var n_size6 "Network Size"

label var ni "Isolation"

label var r_wrkstat "Working Status"
	label define r_wrkstat 0 "Employed/Looking" 1 "Retired" 2 "Homemaker" 
	label value r_wrkstat r_wrkstat 

label var i_age "Interviewer : Age"
label var i_female "Interviewer : Female"
label var i_race "Interviewer : Race"
label define i_race 1 "Interviewer:White" 2 "Interviewer:Black" 3 "Interviewer:Other"
	label value i_race i_race 

label var i_tenure "Interviewer: Experience (Years)"
label var i_phonemode "Telephone Mode"
label var i_uncoop "Uncooperativeness"
label var i_poorcomprend "Poor Comprehension"
label var r_nomember "Voluntary Association Member"
label var i_numskip_c "Prior SAQ Skips"
label define i_numskip_c 0 "SAQ Skips (None)" 1 "SAQ: 1 Prior Skip" 2 "SAQ: 2 to 3 Prior Skips" 3 "SAQ: 4+ Prior Skips"
	label value i_numskip_c i_numskip_c 
label define ballot 1 "Ballot One (early position)" 2 "Ballot Two (after voluntary membership)" 3 "Ballot Three (end position)"
	label value ballot ballot 

saveold imp_combine.dta, version(12) replace



